#### setting environment ####
require(quanteda)
require(stm)
require(stringi)
require(runjags)
require(coda)

newspaper.names <- c("Asahi", "Chugoku", "Chunichi", "Hokkaido", "Kahoku", 
                     "Mainichi", "Nikkei", "Nishinippon", "Sankei", "Yomiuri")

#### data preparation ####
# document-feature matrix of newspaper editorials published from October 1, 2017 to September 30, 2018
load("Study2_dfm.Rdata")
# estimation results of the Wordfish models
load("STM_result_65.Rdata")

#### estimate topic-specific positions by the Wordfish, in which each editorial is classified into one topic category ####
## detect topic IDs of each editorial
editorial.topics <- apply(STM.result$theta, 1, function(x) which(x == max(x)))

## estimate the Wordfish model for each topic
topic.specific.positions <- matrix(NA, 10, 65)
rownames(topic.specific.positions) <- newspaper.names
wordfish.result <- list()
for (i in 1:65) {
  # document-feature matrix for topic i
  Wordfish.dfm <- dfm_subset(Study2.dfm, editorial.topics == i)
  # compress the document-feature matrix at the newspaper level
  Wordfish.dfm <- dfm(Wordfish.dfm, groups = "newspaper")
  # remove words not used by two or more newspapers
  Wordfish.dfm <- dfm_trim(Wordfish.dfm, min_docfreq = 2, docfreq_type = "count")
  # estimatee Wordfish model
  wordfish.result[[i]] <- textmodel_wordfish(Wordfish.dfm, sparse = TRUE)
  # record topic-specific positions of the newspapers
  newspaper.ID <- match(Wordfish.dfm@Dimnames$docs, newspaper.names)
  topic.specific.positions[newspaper.ID, i] <- wordfish.result[[i]]$theta
}

#### preliminary factor analysis to impose a sign restriction ####
# function to create initial values
initial.fun.preliminary <- function(seed1, seed2) {
  set.seed(seed1)
  b <- rnorm(65)
  x <- rnorm(10, 0, 0.01)
  p <- runif(65, 0.5, 1)
  list(beta = b, x = x, inv.phi = p, 
       .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed2)
}
# create initial values
initial.values.preliminary <- initial.fun.preliminary(12345, 67890)
# estimate a factor analysis model using JAGS
FA.result.preliminary <- run.jags(model = "factor_analysis_JAGS_preliminary.R", 
                                  monitor = c("x", "beta", "phi"), 
                                  data = list(y = topic.specific.positions, n = 10, m = 65), 
                                  inits = initial.values.preliminary, 
                                  n.chains = 1, burnin = 5000, sample = 1000, modules = "glm", 
                                  summarise = FALSE, thin = 10, method = "parallel")
## fix the signs of factor loadings of some items so that Chunichi's ideal point becomes negative
# whether Chunichi's ideal point was estimated to be negative
Chunichi.score.sign.preliminary <- sign(mean(as.matrix(FA.result.preliminary$mcmc[, 3]))) == -1
# posterior means of factor loadings
post.loadings.preliminary <- colMeans(as.matrix(FA.result.preliminary$mcmc[, 11:(10 + i)]))
# IDs of five topics whose factor loadings should be positive to make Chunichi's ideal point negative
positive.items <- order(post.loadings.preliminary, decreasing = Chunichi.score.sign.preliminary)[1:5]
# IDs of other topics
unconstrained.items <- c(1:65)[-positive.items]

#### main factor analysis ####
# function to create initial values
initial.fun <- function(seed1, seed2) {
  set.seed(seed1)
  b <- rnorm(65)
  b[positive.items] <- runif(5, 10, 15)
  x <- rnorm(10, 0, 0.01)
  p <- runif(65, 0.5, 1)
  list(beta = b, x = x, inv.phi = p,
       .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed2)
}
# create initial values
initial.values <- list()
for (j in 1:3) {
  initial.values[[j]] <- initial.fun(100 * j, 1000 * j)
}
# estimate a factor analysis model using JAGS
FA.result <- run.jags(model = "factor_analysis_JAGS.R", 
                      monitor = c("x", "beta", "phi"), 
                      data = list(y = topic.specific.positions, n = 10, m = 65, 
                                  positive.items = positive.items, 
                                  unconstrained.items = unconstrained.items), 
                      inits = initial.values, 
                      n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                      summarise = FALSE, thin = 20, method = "parallel")
# convergence check
print(which(gelman.diag(FA.result$mcmc, multivariate = FALSE)[[1]][, 1] > 1.1))

#### newspapers' ideal points covering various issues (Figure A.6) ####
# posterior draws of ideal points
ideal.points.draws <- t(rbind(FA.result$mcmc[[1]][, 1:10], 
                              FA.result$mcmc[[2]][, 1:10], 
                              FA.result$mcmc[[3]][, 1:10])) * 10
rownames(ideal.points.draws) <- newspaper.names

## summary statistics
# point estimates (posterior means)
ideal.points.mean <- rowMeans(ideal.points.draws)
# 95% credible intervals (highest posterior density intervals)
ideal.points.CI <- HPDinterval(as.mcmc(t(ideal.points.draws)))
# ascending order of the point estimates
ideal.points.order <- order(ideal.points.mean, decreasing = TRUE)

## draw the figure
cairo_pdf("Figure_A6.pdf", 
          width = 5, height = 6, pointsize = 11, family = "Helvetica")
par(mar = c(5, 1, 1, 1))
dotchart(ideal.points.mean[ideal.points.order], pch = 19, xlim = c(-1.1, 1.6), 
         xlab = "Estimated Ideal Points")
segments(ideal.points.CI[ideal.points.order, 1], 1:10, 
         ideal.points.CI[ideal.points.order, 2], 1:10)
dev.off()